# -*- coding: UTF-8 -*-
import scrapy
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

import MySQLdb
import datetime

nowDate = datetime.datetime.now().strftime('%Y-%m-%d')

conn= MySQLdb.connect(
        host='127.0.0.1',
        port = 3306,
        user='root',
        passwd='root',
        db ='test',
        charset='utf8'
        )

cur = conn.cursor()


class DmozSpider(scrapy.Spider):
    name = "dmoz"
    allowed_domains = ["dmoz.org"]
    start_urls = []
    for j in range(1,11):
        u = 'http://shanghai.jianzhimao.com/dbx_zbx_0/index'+str(j)+'.html'
        start_urls.append(str(u))
        u = 'http://shanghai.jianzhimao.com/dbx_zbx_0_salary/index'+str(j)+'.html'
        start_urls.append(str(u))
        u = 'http://shanghai.jianzhimao.com/dbx_zbx_0_click/index'+str(j)+'.html'
        start_urls.append(str(u))
        
    def parse(self, response):
        for sel in response.xpath('//ul[@id="content_list_wrap"]/li'):
            #title = sel.xpath('a/text()').extract()
            link = sel.xpath('a/@href').extract()
            #desc = sel.xpath('text()').extract()
            url = 'http://shanghai.jianzhimao.com' + link[0]
            
            sql = 'INSERT INTO newUrl(url)  values("'+str(url)+'")'
            try:
                cur.execute(sql)
                conn.commit()
                print url + '-' + str(nowDate)
            except:
                conn.rollback()
                print url + '-' + str(nowDate) + ' issit'
            
            
#cur.close()
#conn.commit()
#conn.close()